<html>
<body>
This script will extract the Agencies off of the 211 website and save them to a temporary file.  

You must update the Taxonomies before running this script.  The script <A href="extract_taxonomies.php">extract_taxonomies</A> is used to update the list of taxonomies.

After running this script you must run the <A href="load_extracts.php">load_extracts</A> script to update the database.
<?php 

$base_URL = "http://211online.unitedwayatlanta.org/";
$agency_URL = $base_URL."KeywordList.aspx?;;0;;N;0;505022;;";
$taxonomy_link_regex_start = preg_quote ("href=\"MatchList.aspx*>");
$attr_html_start = "MatchList.aspx?k;;0;;N;0;505022;";
$agency_page_skip_hint = "Sorry, we were unable to find an agency or program in our database that offers";
$max_runtime = 6000;
$agency_delim = "<div class=\\\"ml_name\\\">";
$agency_loc_delim = "<div class=\\\"ml_name\\\"><div class=\\\"ml_location\\\">";
$agency_end_delim = "<\/div>";
$agency_end_loc_delim = "<\/div><\/div>";
$agency_file = "extracted_agencies.csv";
$taxonomy_file = "extracted_taxonomies.csv";
$csv_delim = ",";
$agency_url_list[] = array();
$taxonomy_list[] = array();
$agency_list[] = array();

echo "Maximum script runtime set to ".$max_runtime." seconds.<BR>";
set_time_limit($max_runtime);
error_reporting('IGNORE_REPEATED_ERRORS');


$handle = @fopen($taxonomy_file, "r");
if ($handle) {
        $i = 0;
        $source = null;
        $buftok[] = null;
	while (!feof($handle)){
		$bufferin = fgets($handle);
                if (strlen(trim($bufferin))>0){
         		$buftokin =  explode($csv_delim, $bufferin);
                }else{
                        $buftokin = array();
                }

                if(strlen(trim($buftokin[1]))>0){
                    // echo $buftokin[0].":".$buftokin[1]."<BR>";
                    $agency_url_list[$i] = $base_URL.$buftokin[1];
                    $taxonomy_list[$i] = $buftokin[0];
                    //echo $agency_url_list[$i].":".$taxonomy_list[$i]."<BR>";
                    $i++;
                }
	}

     fclose($handle);   
}
//$testurl = "http://www.google.com/";
//$testurl ="http://211online.unitedwayatlanta.org/MatchList.aspx?k;;0;;N;0;505022;Academic%20Competitions";
//if (file_exists(trim($testurl))) {echo "test file exists<BR>";}else{echo "test file doesn't exist<BR>";}
//$httpgetresponse = @fopen($testurl, "r");
//if($httpgetresponse){echo "null handle<BR>";}else{echo "good handle<BR>";}
//if(is_readable($testurl)){echo "readable<BR>";}else{echo "notreadable<BR>";}
//echo "<BR><HR><BR>";
//echo file_get_contents($testurl);
//echo "<BR><HR><BR>";
//$i = 10;
//error_reporting(E_ALL);
for ($j=0; $j<=$i; $j++){
    //echo "opening:".$j.")".trim($agency_url_list[$j])."<BR>";
    //if (file_exists(trim($agency_url_list[$j]))) {echo "file exists<BR>";}else{echo "file doesn't exist<BR>";}
    //echo "sub URL:".preg_replace("/ /","%20",trim($agency_url_list[$j]))."<BR>";
    //if (file_exists(preg_replace("/ /","%20",trim($agency_url_list[$j])))) {echo "file exists<BR>";}else{echo "file doesn't exist<BR>";}
    //$handle = @fopen(preg_replace("/ /","%20",trim($agency_url_list[$j])), "r");

    if(null){
        echo "handle open<BR>";
        echo "handle status:<BR>";
        $fstat = fstat($handle);
        foreach ( $fstat as $key => $value ){
            echo "  ".$key.", ".$value."<BR>";
        }
        while(!feof($handle)){
            $buffer = fgets($handle);
            echo "buffer: ".htmlentities($buffer)."<BR>";
            $split[] = preg_grep( "/".$agency_page_skip_hint."|".$agency_delim ."|".$agency_loc_delim."/", $buffer);
            if(array_count_values($split)>0){
              if(in_array($agency_page_skip_hint, $split)){
                 //noagencies for this taxonomy, ff to the eof
                 fseek($handle, SEEK_END);
                 echo "Skip hint found.<BR>";
              }else{
                 //look for agencies
                 echo "Looking for agency names.<BR>";
              }
            }else{
              echo "no grep matches this line<BR>";
            }
        } 
        fclose($handle);
        echo "handle closed<BR>";   
    }else{
        //echo "null handle<BR>";
    }

    $pattern1 = "/\<div class=\"ml_data\"\>\<div class=\"ml_name\"\>([\w|\d|\s-\/\\,\'&:\.]+)\</";
    $pattern2 = "/\<div class=\"ml_data\"\>\<div class=\"ml_location\"\>([\w|\d|\s-\/\\,\'&:\.]+)\</";
    //$pattern2 = $pattern1;
    $tempurl = preg_replace("/ /","%20",trim($agency_url_list[$j]));
    echo "working on: ".$tempurl."<BR>";
    $urldata = file_get_contents($tempurl);
    if (strlen(trim($urldata))>0){
         if (preg_match("/".$agency_page_skip_hint."/", $urldata)>0){
               echo "skip hint found: ".$tempurl."<BR>";
         }else{
              //if(preg_match("/".$agency_delim ."|".$agency_loc_delim."/", $urldata)){
                  //echo "agencies found:".strlen($urldata)."<BR>";
                  $match_count = preg_match_all($pattern1,$urldata,$agency_array[]) + preg_match_all($pattern2,$urldata,$agency_array[]);
                  //echo $match_count."agencies found<BR>"; 
                  if( null ){
                       foreach($agency_array as $key => $value){
                            echo "key: ".$key."<BR>";           
                            echo "value: ".$value."<BR>";
                            foreach($value as $subkey => $subvalue){
                                 echo "subkey: ".$subkey."<BR>";
                                 echo "subvalue: ".$subvalue."<BR>";
                                 foreach($subvalue as $subsubkey => $subsubvalue){
                                      echo "subsubkey: ".$subsubkey."<BR>";
                                      echo "subsubvalue: ".$subsubvalue."<BR>";
                                }
                            }
                       }
                 // }
              }
         }
     }
    //echo "<BR><HR><BR>";
}


$fp = fopen($agency_file, 'w');
$agency_count = 0;
sort($agency_array);
                       foreach($agency_array as $key => $value){
                            //echo "key: ".$key."<BR>"; 
                            //if(strlen($value)>0){$agency_count++;}          
                            //echo "valuelen: ".strlen($value)."<BR>";
                            foreach($value as $subkey => $subvalue){
                                 //echo "subkey: ".$subkey."<BR>";
                                 //echo "subvaluelen: ".strlen($subvalue)."<BR>";
                                 //if(strlen($subvalue)>0){$agency_count++;}
                                 foreach($subvalue as $subsubkey => $subsubvalue){
                                      //echo "subsubkey: ".$subsubkey."<BR>";
                                      //echo "Agency: ".trim($subsubvalue)."<BR>";
                                      if((strlen($subsubvalue)>0)&&(substr($subsubvalue,0,1)!="<")){
                                           $agency_count++;
                                           fwrite($fp,$subsubvalue."\r\n");
                                      }
                                }
                            }
                       }
echo "agencycount:".$agency_count."; from: ".$i." URLs<BR>";

fclose($fp);
echo "saved to ".$agency_file."<BR>";


?><BR>

</body>
</html>